import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
print('modules are imported')
modules are imported
df=pd.read_csv('full_grouped.csv')
df.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Eastern Mediterranean |
| 1 | 2020-01-22 | Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 2 | 2020-01-22 | Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
| 3 | 2020-01-22 | Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 4 | 2020-01-22 | Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
df.tail()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 35151 | 2020-07-27 | West Bank and Gaza | 10621 | 78 | 3752 | 6791 | 152 | 2 | 0 | Eastern Mediterranean |
| 35152 | 2020-07-27 | Western Sahara | 10 | 1 | 8 | 1 | 0 | 0 | 0 | Africa |
| 35153 | 2020-07-27 | Yemen | 1691 | 483 | 833 | 375 | 10 | 4 | 36 | Eastern Mediterranean |
| 35154 | 2020-07-27 | Zambia | 4552 | 140 | 2815 | 1597 | 71 | 1 | 465 | Africa |
| 35155 | 2020-07-27 | Zimbabwe | 2704 | 36 | 542 | 2126 | 192 | 2 | 24 | Africa |
df.shape
(35156, 10)
df = df[(df['Confirmed'] > 0) & (df['Deaths'] > 0)]
df.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 36 | 2020-01-22 | China | 548 | 17 | 28 | 503 | 0 | 0 | 0 | Western Pacific |
| 223 | 2020-01-23 | China | 643 | 18 | 30 | 595 | 95 | 1 | 2 | Western Pacific |
| 410 | 2020-01-24 | China | 920 | 26 | 36 | 858 | 277 | 8 | 6 | Western Pacific |
| 597 | 2020-01-25 | China | 1406 | 42 | 39 | 1325 | 486 | 16 | 3 | Western Pacific |
| 784 | 2020-01-26 | China | 2075 | 56 | 49 | 1970 | 669 | 14 | 10 | Western Pacific |
df[df['Country/Region'] == 'India']
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 9242 | 2020-03-11 | India | 62 | 1 | 4 | 57 | 6 | 1 | 0 | South-East Asia |
| 9429 | 2020-03-12 | India | 73 | 1 | 4 | 68 | 11 | 0 | 0 | South-East Asia |
| 9616 | 2020-03-13 | India | 82 | 2 | 4 | 76 | 9 | 1 | 0 | South-East Asia |
| 9803 | 2020-03-14 | India | 102 | 2 | 4 | 96 | 20 | 0 | 0 | South-East Asia |
| 9990 | 2020-03-15 | India | 113 | 2 | 13 | 98 | 11 | 0 | 9 | South-East Asia |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 34300 | 2020-07-23 | India | 1288108 | 30601 | 817209 | 440298 | 49310 | 740 | 34602 | South-East Asia |
| 34487 | 2020-07-24 | India | 1337024 | 31358 | 849432 | 456234 | 48916 | 757 | 32223 | South-East Asia |
| 34674 | 2020-07-25 | India | 1385635 | 32060 | 885573 | 468002 | 48611 | 702 | 36141 | South-East Asia |
| 34861 | 2020-07-26 | India | 1435616 | 32771 | 917568 | 485277 | 49981 | 711 | 31995 | South-East Asia |
| 35048 | 2020-07-27 | India | 1480073 | 33408 | 951166 | 495499 | 44457 | 637 | 33598 | South-East Asia |
139 rows × 10 columns
fig = px.choropleth(df,locations = 'Country/Region' ,locationmode = 'country names' , color='Confirmed' ,animation_frame='Date')
fig.update_layout(title_text = 'Global Spread of Covid-19 from 22nd Jan 2020 to 16th July 2020')
fig.show()
fig = px.choropleth(df,locations = 'Country/Region' ,locationmode = 'country names' , color='Deaths' ,animation_frame='Date')
fig.update_layout(title_text = 'Global Deaths of Covid-19 from 22nd Jan 2020 to 16th July 2020')
fig.show()
df_china = df[df['Country/Region'] == 'China']
df_china.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 36 | 2020-01-22 | China | 548 | 17 | 28 | 503 | 0 | 0 | 0 | Western Pacific |
| 223 | 2020-01-23 | China | 643 | 18 | 30 | 595 | 95 | 1 | 2 | Western Pacific |
| 410 | 2020-01-24 | China | 920 | 26 | 36 | 858 | 277 | 8 | 6 | Western Pacific |
| 597 | 2020-01-25 | China | 1406 | 42 | 39 | 1325 | 486 | 16 | 3 | Western Pacific |
| 784 | 2020-01-26 | China | 2075 | 56 | 49 | 1970 | 669 | 14 | 10 | Western Pacific |
let's select the columns that we need
df_china.pop('WHO Region') #---------Dropping WHO Region Column ----------
36 Western Pacific
223 Western Pacific
410 Western Pacific
597 Western Pacific
784 Western Pacific
...
34257 Western Pacific
34444 Western Pacific
34631 Western Pacific
34818 Western Pacific
35005 Western Pacific
Name: WHO Region, Length: 188, dtype: object
df_china.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | |
|---|---|---|---|---|---|---|---|---|---|
| 36 | 2020-01-22 | China | 548 | 17 | 28 | 503 | 0 | 0 | 0 |
| 223 | 2020-01-23 | China | 643 | 18 | 30 | 595 | 95 | 1 | 2 |
| 410 | 2020-01-24 | China | 920 | 26 | 36 | 858 | 277 | 8 | 6 |
| 597 | 2020-01-25 | China | 1406 | 42 | 39 | 1325 | 486 | 16 | 3 |
| 784 | 2020-01-26 | China | 2075 | 56 | 49 | 1970 | 669 | 14 | 10 |
calculating the first derivation of confrimed column
px.line(df_china, x = 'Date' , y = ['Confirmed' ,'New cases'])
print("Maximum New Cases on Single day = ",df_china['New cases'].max())
Maximum New Cases on Single day = 13628
px.line(df_china , x = 'Date' ,y = ['Deaths' , 'New deaths'])
df_china['New deaths'].max()
1290
df.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 36 | 2020-01-22 | China | 548 | 17 | 28 | 503 | 0 | 0 | 0 | Western Pacific |
| 223 | 2020-01-23 | China | 643 | 18 | 30 | 595 | 95 | 1 | 2 | Western Pacific |
| 410 | 2020-01-24 | China | 920 | 26 | 36 | 858 | 277 | 8 | 6 | Western Pacific |
| 597 | 2020-01-25 | China | 1406 | 42 | 39 | 1325 | 486 | 16 | 3 | Western Pacific |
| 784 | 2020-01-26 | China | 2075 | 56 | 49 | 1970 | 669 | 14 | 10 | Western Pacific |
Countries = list(df['Country/Region'].unique())
max_new_cases = []
for i in Countries:
mnc = df[df['Country/Region'] == i].Confirmed.diff().max()
max_new_cases.append(mnc)
df_mnc = pd.DataFrame()
df_mnc['Country'] = Countries
df_mnc['Max_Cases'] = max_new_cases
df_mnc.head()
| Country | Max_Cases | |
|---|---|---|
| 0 | China | 13628.0 |
| 1 | Philippines | 2486.0 |
| 2 | Japan | 978.0 |
| 3 | France | 26849.0 |
| 4 | Taiwan* | 27.0 |
df_mnc.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 171 entries, 0 to 170 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 171 non-null object 1 Max_Cases 171 non-null float64 dtypes: float64(1), object(1) memory usage: 2.8+ KB
px.bar(df_mnc,x='Country',y='Max_Cases',color='Country',title = 'Global Maximum Covid Cases',log_y=True)
On 9 March 2020, the government of Italy under Prime Minister Giuseppe Conte imposed a national quarantine, restricting the movement of the population except for necessity, work, and health circumstances, in response to the growing pandemic of COVID-19 in the country. source
italy_lockdown_start_date = '2020-03-09'
italy_lockdown_a_month_later = '2020-04-09'
df.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 36 | 2020-01-22 | China | 548 | 17 | 28 | 503 | 0 | 0 | 0 | Western Pacific |
| 223 | 2020-01-23 | China | 643 | 18 | 30 | 595 | 95 | 1 | 2 | Western Pacific |
| 410 | 2020-01-24 | China | 920 | 26 | 36 | 858 | 277 | 8 | 6 | Western Pacific |
| 597 | 2020-01-25 | China | 1406 | 42 | 39 | 1325 | 486 | 16 | 3 | Western Pacific |
| 784 | 2020-01-26 | China | 2075 | 56 | 49 | 1970 | 669 | 14 | 10 | Western Pacific |
let's get data related to italy
df_italy = df[df['Country/Region'] == 'Italy']
lets check the dataframe
df_italy.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 5695 | 2020-02-21 | Italy | 20 | 1 | 0 | 19 | 17 | 1 | 0 | Europe |
| 5882 | 2020-02-22 | Italy | 62 | 2 | 1 | 59 | 42 | 1 | 1 | Europe |
| 6069 | 2020-02-23 | Italy | 155 | 3 | 2 | 150 | 93 | 1 | 1 | Europe |
| 6256 | 2020-02-24 | Italy | 229 | 7 | 1 | 221 | 74 | 4 | -1 | Europe |
| 6443 | 2020-02-25 | Italy | 322 | 10 | 1 | 311 | 93 | 3 | 0 | Europe |
ok! now let's do the visualization
fig = px.line(df_italy , x='Date',y='New cases',title='Before And After Lockdown in Italy')
#-------------------------------Function for Starting Date ------------------------------
fig.add_shape(
dict(
type='line',
x0=italy_lockdown_start_date, y0=0,
x1=italy_lockdown_start_date , y1=df_italy['New cases'].max(),
line = dict(color='red',width=3)
)
)
fig.add_annotation(
dict(
x=italy_lockdown_start_date,
y=df_italy['New cases'].max(),
text = 'Starting Date of the Month'
)
)
#-----------------------------Function for Ending date ------------------------------
fig.add_shape(
dict(
type='line',
x0=italy_lockdown_a_month_later, y0=0,
x1=italy_lockdown_a_month_later , y1=df_italy['New cases'].max(),
line = dict(color='red',width=3)
)
)
fig.add_annotation(
dict(
x=italy_lockdown_a_month_later,
y=df_italy['New cases'].max(),
text = 'Ending Date of the Month'
)
)
df_italy.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 5695 | 2020-02-21 | Italy | 20 | 1 | 0 | 19 | 17 | 1 | 0 | Europe |
| 5882 | 2020-02-22 | Italy | 62 | 2 | 1 | 59 | 42 | 1 | 1 | Europe |
| 6069 | 2020-02-23 | Italy | 155 | 3 | 2 | 150 | 93 | 1 | 1 | Europe |
| 6256 | 2020-02-24 | Italy | 229 | 7 | 1 | 221 | 74 | 4 | -1 | Europe |
| 6443 | 2020-02-25 | Italy | 322 | 10 | 1 | 311 | 93 | 3 | 0 | Europe |
now let's plot a line chart to compare COVID19 national lockdowns impacts on spread of the virus and number of active cases
fig =px.line(df_italy,x='Date',y=['New cases','New deaths'])
fig.show()
df_italy['Normalized_Affection_Rate']= df_italy['New cases']/df_italy['New cases'].max()
df_italy['Normalized_Death_Rate']= df_italy['New deaths']/df_italy['New deaths'].max()
df_italy.pop('WHO Region')
C:\Users\SAURAV BEDSE\AppData\Local\Temp\ipykernel_16312\1512520190.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\SAURAV BEDSE\AppData\Local\Temp\ipykernel_16312\1512520190.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
5695 Europe
5882 Europe
6069 Europe
6256 Europe
6443 Europe
...
34306 Europe
34493 Europe
34680 Europe
34867 Europe
35054 Europe
Name: WHO Region, Length: 158, dtype: object
df_italy.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | Normalized_Affection_Rate | Normalized_Death_Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 5695 | 2020-02-21 | Italy | 20 | 1 | 0 | 19 | 17 | 1 | 0 | 0.002593 | 0.001088 |
| 5882 | 2020-02-22 | Italy | 62 | 2 | 1 | 59 | 42 | 1 | 1 | 0.006405 | 0.001088 |
| 6069 | 2020-02-23 | Italy | 155 | 3 | 2 | 150 | 93 | 1 | 1 | 0.014183 | 0.001088 |
| 6256 | 2020-02-24 | Italy | 229 | 7 | 1 | 221 | 74 | 4 | -1 | 0.011286 | 0.004353 |
| 6443 | 2020-02-25 | Italy | 322 | 10 | 1 | 311 | 93 | 3 | 0 | 0.014183 | 0.003264 |
fig =px.line(df_italy,x='Date',y=['Normalized_Affection_Rate','Normalized_Death_Rate'])
fig.show()
Lockdown was started in Freiburg, Baden-Württemberg and Bavaria on 20 March 2020. Three days later, it was expanded to the whole of Germany
Germany_lockdown_start_date = '2020-03-23'
Germany_lockdown_a_month_later = '2020-04-23'
let's select the data related to Germany
df_germany = df[df['Country/Region'] == 'Germany']
let's check the dataframe
df_germany.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 8854 | 2020-03-09 | Germany | 1176 | 2 | 18 | 1156 | 136 | 2 | 0 | Europe |
| 9041 | 2020-03-10 | Germany | 1457 | 2 | 18 | 1437 | 281 | 0 | 0 | Europe |
| 9228 | 2020-03-11 | Germany | 1908 | 3 | 25 | 1880 | 451 | 1 | 7 | Europe |
| 9415 | 2020-03-12 | Germany | 2078 | 3 | 25 | 2050 | 170 | 0 | 0 | Europe |
| 9602 | 2020-03-13 | Germany | 3675 | 7 | 46 | 3622 | 1597 | 4 | 21 | Europe |
selecting the needed column
fig = px.line(df_germany , x='Date',y='New cases',title='Before And After Lockdown in germany')
#-------------------------------Function for Starting Date ------------------------------
fig.add_shape(
dict(
type='line',
x0=Germany_lockdown_start_date, y0=0,
x1=Germany_lockdown_start_date , y1=df_germany['New cases'].max(),
line = dict(color='red',width=3)
)
)
fig.add_annotation(
dict(
x=Germany_lockdown_start_date,
y=df_germany['New cases'].max(),
text = 'Starting Date of the Month'
)
)
#-----------------------------Function for Ending date ------------------------------
fig.add_shape(
dict(
type='line',
x0=Germany_lockdown_a_month_later, y0=0,
x1=Germany_lockdown_a_month_later , y1=df_germany['New cases'].max(),
line = dict(color='red',width=3)
)
)
fig.add_annotation(
dict(
x=Germany_lockdown_a_month_later,
y=df_germany['New cases'].max(),
text = 'Ending Date of the Month'
)
)
now let's plot the line chart
fig = px.line(df_germany,x='Date',y=['New cases','New deaths'])
fig.show()
let's do some scaling
df_germany['Normalized_Affection_Rate']= df_germany['New cases']/df_germany['New cases'].max()
df_germany['Normalized_Death_Rate']= df_germany['New deaths']/df_germany['New deaths'].max()
C:\Users\SAURAV BEDSE\AppData\Local\Temp\ipykernel_16312\754776824.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\SAURAV BEDSE\AppData\Local\Temp\ipykernel_16312\754776824.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's plot the line chart
fig =px.line(df_germany,x='Date',y=['Normalized_Affection_Rate','Normalized_Death_Rate'])
fig.show()